library(tidyquant)
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
## Loading required package: quantmod
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
## Loading required package: tidyverse
## -- Attaching packages ---------------------------------- tidyverse 1.2.1 --
## √ ggplot2 3.1.0     √ purrr   0.2.5
## √ tibble  2.0.1     √ dplyr   0.7.8
## √ tidyr   0.8.2     √ stringr 1.3.1
## √ readr   1.3.1     √ forcats 0.3.0
## -- Conflicts ------------------------------------- tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date()        masks base::date()
## x dplyr::filter()          masks stats::filter()
## x dplyr::first()           masks xts::first()
## x lubridate::intersect()   masks base::intersect()
## x dplyr::lag()             masks stats::lag()
## x dplyr::last()            masks xts::last()
## x lubridate::setdiff()     masks base::setdiff()
## x lubridate::union()       masks base::union()
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library(readr)
library(timetk)
library(tidyverse)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(vars)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: strucchange
## Loading required package: sandwich
## 
## Attaching package: 'strucchange'
## The following object is masked from 'package:stringr':
## 
##     boundary
## Loading required package: urca
## Loading required package: lmtest
library(ggfortify)
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
theme_set(theme_bw() + 
              theme(strip.text.x = element_text(hjust = 0),
                    strip.text.y = element_text(hjust = 1),
                    strip.background = element_blank()))

Problem 1

  1. Use tq_get with get = "economic.data" option to obtain the following time series for the period 1950Q1-2017Q4 from FRED: U.S. real GDP GDPC1, and GDP deflator GDPDEF. Then, use tq_get with get = "stock.prices" to obtain the data for the adjusted closing value of S&P 500 Index ^GSPC for the period 1950-01-01 to 2017-12-31 from Yahoo Finance. Construct the quarterly average values of the closing price of S&P 500 Index.

  2. Use the data from (a) to construct the following two time series: \[ dlrGDP_t = 400 \Delta\log GDP_t \] which approximates the annualized growth rate of the U.S. real GDP and \[ dlrSP500_t = 100 (\Delta \log SP500_t - \Delta \log GDPDEF_t) \] which approximates the inflation adjusted annual return of S&P 500.

gdp_raw<- tq_get("GDPC1", get = "economic.data", from = "1950-01-01", to = "2017-12-31") %>% 
           rename(rGDP = price) %>%
          mutate(dlrGDP = 400*(log(rGDP) - lag(log(rGDP))))

gdpdef_raw <- tq_get("GDPDEF", get = "economic.data", from = "1950-01-01", to = "2017-12-31") %>% 
              rename(defGDP = price) %>%
              mutate(dldefGDP=log(defGDP) - lag(log(defGDP)))

index_raw <- tq_get("^GSPC", from = "1950-01-01", to = "2017-12-31") %>%
              dplyr::select(date, adjusted) %>%
              mutate(qtryear = as.yearqtr(date)) %>%
              group_by(qtryear) %>%
              summarise(SP500 = mean(adjusted)) %>%
              ungroup() %>% 
              rename(index = SP500) %>%
              mutate(dlindex=log(index) - lag(log(index))) %>% 
              mutate(date = as.Date(qtryear))
y_raw <- inner_join(gdpdef_raw, index_raw,by = "date") %>% 
         mutate(dlrsp500= 100*(dlindex-dldefGDP))
y_tbl <- inner_join(y_raw,gdp_raw,by = "date")
<<<<<<< HEAD


#the following is better 
#  y_tbl <- gdpdef_raw %>% 
#          inner_join(index_raw, by = "date") %>% 
#          inner_join(gdp_raw, by = "date") %>%
#          mutate(dlrsp500= 100*(dlindex-dldefGDP)) 
=======
>>>>>>> master
y_tbl     
  1. Estimate a bivariate reduced form VAR for \(\mathbf y_t = (dlrSP500_t, dlrGDP_t)'\) for the period 1990Q1-2018Q4, use information criteria to select number of lags. How large is the correlation of residuals in the model, and what are the implications for IRFs and FEVDs based on Choleski decomposition?
# convert the data into ts
y.ts <-
   y_tbl %>%
    dplyr::select(date, dlrsp500,dlrGDP) %>%
    filter(date >= "1990-01-01" & date <= "2018-10-01") %>% 
    tk_ts(select = c("dlrsp500","dlrGDP"), start = 1990, frequency = 4)
# load package that allows to estimate and analyze VAR models
library(vars)
VARselect(y.ts, lag.max = 8, type = "const")    
## $selection
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      2      2      1      2 
## 
## $criteria
##                 1          2          3          4          5          6
## AIC(n)   4.811091   4.757819   4.818544   4.887293   4.957495   4.990404
## HQ(n)    4.872898   4.860831   4.962761   5.072714   5.184120   5.258234
## SC(n)    4.963653   5.012087   5.174520   5.344976   5.516885   5.651502
## FPE(n) 122.869586 116.508857 123.835242 132.709300 142.463120 147.382725
##                 7          8
## AIC(n)   5.050904   5.103155
## HQ(n)    5.359939   5.453395
## SC(n)    5.813709   5.967668
## FPE(n) 156.797499 165.517021
# estimate VAR(p) using AIC to select p
varp <- VAR(y.ts, ic = "AIC", lag.max = 8, type = "const")  
varp                      
## 
## VAR Estimation Results:
## ======================= 
## 
## Estimated coefficients for equation dlrsp500: 
## ============================================= 
## Call:
## dlrsp500 = dlrsp500.l1 + dlrGDP.l1 + dlrsp500.l2 + dlrGDP.l2 + const 
## 
## dlrsp500.l1   dlrGDP.l1 dlrsp500.l2   dlrGDP.l2       const 
##  0.40630346  0.23059717 -0.08858555 -0.24545554  0.95042083 
## 
## 
## Estimated coefficients for equation dlrGDP: 
## =========================================== 
## Call:
## dlrGDP = dlrsp500.l1 + dlrGDP.l1 + dlrsp500.l2 + dlrGDP.l2 + const 
## 
## dlrsp500.l1   dlrGDP.l1 dlrsp500.l2   dlrGDP.l2       const 
##  0.10696945  0.17842715  0.02214091  0.15741572  1.43232603
summary(varp)
## 
## VAR Estimation Results:
## ========================= 
## Endogenous variables: dlrsp500, dlrGDP 
## Deterministic variables: const 
## Sample size: 110 
## Log Likelihood: -567.134 
## Roots of the characteristic polynomial:
## 0.4107 0.4107 0.3331 0.1515
## Call:
## VAR(y = y.ts, type = "const", lag.max = 8, ic = "AIC")
## 
## 
## Estimation results for equation dlrsp500: 
## ========================================= 
## dlrsp500 = dlrsp500.l1 + dlrGDP.l1 + dlrsp500.l2 + dlrGDP.l2 + const 
## 
##             Estimate Std. Error t value Pr(>|t|)    
## dlrsp500.l1  0.40630    0.10570   3.844 0.000208 ***
## dlrGDP.l1    0.23060    0.27937   0.825 0.411002    
## dlrsp500.l2 -0.08859    0.10909  -0.812 0.418593    
## dlrGDP.l2   -0.24546    0.26602  -0.923 0.358276    
## const        0.95042    0.89781   1.059 0.292209    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 5.639 on 105 degrees of freedom
## Multiple R-Squared: 0.1765,  Adjusted R-squared: 0.1452 
## F-statistic: 5.628 on 4 and 105 DF,  p-value: 0.0003825 
## 
## 
## Estimation results for equation dlrGDP: 
## ======================================= 
## dlrGDP = dlrsp500.l1 + dlrGDP.l1 + dlrsp500.l2 + dlrGDP.l2 + const 
## 
##             Estimate Std. Error t value Pr(>|t|)    
## dlrsp500.l1  0.10697    0.03968   2.696  0.00818 ** 
## dlrGDP.l1    0.17843    0.10487   1.701  0.09183 .  
## dlrsp500.l2  0.02214    0.04095   0.541  0.58987    
## dlrGDP.l2    0.15742    0.09986   1.576  0.11794    
## const        1.43233    0.33702   4.250 4.64e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 2.117 on 105 degrees of freedom
## Multiple R-Squared: 0.2526,  Adjusted R-squared: 0.2242 
## F-statistic: 8.873 on 4 and 105 DF,  p-value: 3.274e-06 
## 
## 
## 
## Covariance matrix of residuals:
##          dlrsp500 dlrGDP
## dlrsp500   31.796  5.413
## dlrGDP      5.413  4.480
## 
## Correlation matrix of residuals:
##          dlrsp500 dlrGDP
## dlrsp500   1.0000 0.4535
## dlrGDP     0.4535 1.0000
# IRFs - based on Choleski decomposition of variance-covariance matrix var(e)
varp.irfs <- irf(varp, n.ahead = 40)
# plot IRFs using plot from vars package
par(mfcol=c(2,2), cex = 0.6)
plot(varp.irfs, plot.type = "single")
<<<<<<< HEAD

=======

>>>>>>> master
# FEVD - based on Choleski decomposition of variance-covariance matrix var(e)
varp.fevd <- fevd(varp, n.ahead = 40)
varp.fevd[[1]][c(1,4,8,40),]
##       dlrsp500      dlrGDP
## [1,] 1.0000000 0.000000000
## [2,] 0.9936345 0.006365496
## [3,] 0.9933203 0.006679714
## [4,] 0.9933201 0.006679877
varp.fevd[[2]][c(1,4,8,40),]
##       dlrsp500    dlrGDP
## [1,] 0.2056676 0.7943324
## [2,] 0.3541008 0.6458992
## [3,] 0.3576050 0.6423950
## [4,] 0.3576052 0.6423948
plot(varp.fevd)

plot(varp.fevd, addbars=8)

Comments: if Use AIC, we can select the number of lags as 2, which is consistent with the VARselect result. The correlation of residuals in the model is 0.4535. For IRFs, from “Orthogonal Impulse Response from dlrGDP” on dlrsp500, it shows the shock from the dlrGDP doesn’t significantly affect dlrGDP. For the opposite direction, “Orthogonal Impulse Response from dlrsp500” on dlrGDP, since the 95% confidence interval is far away from 0, so there is a positive significant effect of a shock for dlrsp500 on dlrGDP. What’s more, this positive effect last until somewhere around 4 or 5 quarters, namely for about 1 year, so it doesn’t last for quite a long time. For FEVD, the result shows that the shock in dlrGDP will not influence dlrsp500, the shock in dlrsp500 will influence dlrGDP for 40%.

  1. Run the Granger causality tests for both variables. What do the results suggest about the predictive power of the two variables? Discuss the economic intution behind your results of Granger causality test.
causality(varp, cause = "dlrGDP") 
## $Granger
## 
##  Granger causality H0: dlrGDP do not Granger-cause dlrsp500
## 
## data:  VAR object varp
## F-Test = 0.61496, df1 = 2, df2 = 210, p-value = 0.5416
## 
## 
## $Instant
## 
##  H0: No instantaneous causality between: dlrGDP and dlrsp500
## 
## data:  VAR object varp
## Chi-squared = 18.764, df = 1, p-value = 1.479e-05
causality(varp, cause = "dlrsp500")  
## $Granger
## 
##  Granger causality H0: dlrsp500 do not Granger-cause dlrGDP
## 
## data:  VAR object varp
## F-Test = 4.3841, df1 = 2, df2 = 210, p-value = 0.01364
## 
## 
## $Instant
## 
##  H0: No instantaneous causality between: dlrsp500 and dlrGDP
## 
## data:  VAR object varp
## Chi-squared = 18.764, df = 1, p-value = 1.479e-05

Comments: (1)the hull hypothesis is dlrGDP is not the granger causing of dlrsp500, i.e. the lags of dlrGDP is not statistically significant in dlrsp500 equation, p-value = 0.5416 shows that we fail to reject the null hypothesis, so they should be jointly 0. we can check from the result of summary(varp), dlrsp500 = dlrsp500.l1 + dlrGDP.l1 + dlrsp500.l2 + dlrGDP.l2 + const, dlrGDP.l1 and dlrGDP.l2 don’t affect affect dlrsp500 significantly. (2) the null hypothesis is dlrsp500 is not the granger causing of dlrGDP, i.e. the lags of dlrsp500 is not statistically significant in dlrGD equation, p-value = 0.01364 shows that we have to reject the null hypothesis, so they cannot jointly be 0. we can check from the result of summary(varp), dlrGDP = dlrsp500.l1 + dlrGDP.l1 + dlrsp500.l2 + dlrGDP.l2 + const, dlrsp500.l1 affect affect dlrsp500 significantly.

  1. Estimate a restricted VAR model in which you remove lags based on Granger causality test from (d).
# define a  matrix with restictions, and then use resitrict order manually
mat.r <- matrix(1, nrow = 2, ncol = 5)
mat.r[1, c(2,4)] <- 0   
varp.r <- restrict(varp, method = "manual", resmat = mat.r)
varp.r
## 
## VAR Estimation Results:
## ======================= 
## 
## Estimated coefficients for equation dlrsp500: 
## ============================================= 
## Call:
## dlrsp500 = dlrsp500.l1 + dlrsp500.l2 + const 
## 
## dlrsp500.l1 dlrsp500.l2       const 
##   0.4396923  -0.1114173   0.8960807 
## 
## 
## Estimated coefficients for equation dlrGDP: 
## =========================================== 
## Call:
## dlrGDP = dlrsp500.l1 + dlrGDP.l1 + dlrsp500.l2 + dlrGDP.l2 + const 
## 
## dlrsp500.l1   dlrGDP.l1 dlrsp500.l2   dlrGDP.l2       const 
##  0.10696945  0.17842715  0.02214091  0.15741572  1.43232603
summary(varp.r)
## 
## VAR Estimation Results:
## ========================= 
## Endogenous variables: dlrsp500, dlrGDP 
## Deterministic variables: const 
## Sample size: 110 
## Log Likelihood: -567.939 
## Roots of the characteristic polynomial:
## 0.4959 0.3338 0.3338 0.3174
## Call:
## VAR(y = y.ts, type = "const", lag.max = 8, ic = "AIC")
## 
## 
## Estimation results for equation dlrsp500: 
## ========================================= 
## dlrsp500 = dlrsp500.l1 + dlrsp500.l2 + const 
## 
##             Estimate Std. Error t value Pr(>|t|)    
## dlrsp500.l1  0.43969    0.09603   4.579 1.27e-05 ***
## dlrsp500.l2 -0.11142    0.09576  -1.164    0.247    
## const        0.89608    0.55279   1.621    0.108    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 5.618 on 107 degrees of freedom
## Multiple R-Squared: 0.2053,  Adjusted R-squared: 0.183 
## F-statistic: 9.213 on 3 and 107 DF,  p-value: 1.785e-05 
## 
## 
## Estimation results for equation dlrGDP: 
## ======================================= 
## dlrGDP = dlrsp500.l1 + dlrGDP.l1 + dlrsp500.l2 + dlrGDP.l2 + const 
## 
##             Estimate Std. Error t value Pr(>|t|)    
## dlrsp500.l1  0.10697    0.03968   2.696  0.00818 ** 
## dlrGDP.l1    0.17843    0.10487   1.701  0.09183 .  
## dlrsp500.l2  0.02214    0.04095   0.541  0.58987    
## dlrGDP.l2    0.15742    0.09986   1.576  0.11794    
## const        1.43233    0.33702   4.250 4.64e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 2.117 on 105 degrees of freedom
## Multiple R-Squared: 0.6291,  Adjusted R-squared: 0.6115 
## F-statistic: 35.63 on 5 and 105 DF,  p-value: < 2.2e-16 
## 
## 
## 
## Covariance matrix of residuals:
##          dlrsp500 dlrGDP
## dlrsp500   32.169  5.413
## dlrGDP      5.413  4.480
## 
## Correlation matrix of residuals:
##          dlrsp500 dlrGDP
## dlrsp500   1.0000 0.4509
## dlrGDP     0.4509 1.0000
varp.r$restrictions
##          dlrsp500.l1 dlrGDP.l1 dlrsp500.l2 dlrGDP.l2 const
## dlrsp500           1         0           1         0     1
## dlrGDP             1         1           1         1     1
Acoef(varp.r)
## [[1]]
##          dlrsp500.l1 dlrGDP.l1
## dlrsp500   0.4396923 0.0000000
## dlrGDP     0.1069695 0.1784272
## 
## [[2]]
##          dlrsp500.l2 dlrGDP.l2
## dlrsp500 -0.11141732 0.0000000
## dlrGDP    0.02214091 0.1574157
# estimate restricted VAR - keep only variables with t-value larger than 2.0
varp.r.ser <- restrict(varp, method = "ser", thresh = 2.0)     
varp.r.ser
## 
## VAR Estimation Results:
## ======================= 
## 
## Estimated coefficients for equation dlrsp500: 
## ============================================= 
## Call:
## dlrsp500 = dlrsp500.l1 
## 
## dlrsp500.l1 
##   0.4236806 
## 
## 
## Estimated coefficients for equation dlrGDP: 
## =========================================== 
## Call:
## dlrGDP = dlrsp500.l1 + dlrGDP.l2 + const 
## 
## dlrsp500.l1   dlrGDP.l2       const 
##   0.1443411   0.2349748   1.6523369
summary(varp.r.ser)   
## 
## VAR Estimation Results:
## ========================= 
## Endogenous variables: dlrsp500, dlrGDP 
## Deterministic variables: const 
## Sample size: 110 
## Log Likelihood: -571.983 
## Roots of the characteristic polynomial:
## 0.4847 0.4847 0.4237     0
## Call:
## VAR(y = y.ts, type = "const", lag.max = 8, ic = "AIC")
## 
## 
## Estimation results for equation dlrsp500: 
## ========================================= 
## dlrsp500 = dlrsp500.l1 
## 
##             Estimate Std. Error t value Pr(>|t|)    
## dlrsp500.l1  0.42368    0.08694   4.873 3.75e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 5.658 on 109 degrees of freedom
## Multiple R-Squared: 0.1789,  Adjusted R-squared: 0.1714 
## F-statistic: 23.75 on 1 and 109 DF,  p-value: 3.753e-06 
## 
## 
## Estimation results for equation dlrGDP: 
## ======================================= 
## dlrGDP = dlrsp500.l1 + dlrGDP.l2 + const 
## 
##             Estimate Std. Error t value Pr(>|t|)    
## dlrsp500.l1  0.14434    0.03436   4.201 5.53e-05 ***
## dlrGDP.l2    0.23497    0.08679   2.707   0.0079 ** 
## const        1.65234    0.28845   5.728 9.42e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 2.132 on 107 degrees of freedom
## Multiple R-Squared: 0.6166,  Adjusted R-squared: 0.6059 
## F-statistic: 57.37 on 3 and 107 DF,  p-value: < 2.2e-16 
## 
## 
## 
## Covariance matrix of residuals:
##          dlrsp500 dlrGDP
## dlrsp500   32.605  5.509
## dlrGDP      5.509  4.632
## 
## Correlation matrix of residuals:
##          dlrsp500 dlrGDP
## dlrsp500   1.0000 0.4483
## dlrGDP     0.4483 1.0000
varp.r.ser$restrictions
##          dlrsp500.l1 dlrGDP.l1 dlrsp500.l2 dlrGDP.l2 const
## dlrsp500           1         0           0         0     0
## dlrGDP             1         0           0         1     1
Acoef(varp.r.ser)  
## [[1]]
##          dlrsp500.l1 dlrGDP.l1
## dlrsp500   0.4236806         0
## dlrGDP     0.1443411         0
## 
## [[2]]
##          dlrsp500.l2 dlrGDP.l2
## dlrsp500           0 0.0000000
## dlrGDP             0 0.2349748

Comments:The restricted VAR model after removing the lags based on (d), the equation of dlrsp500 is dlrsp500 = dlrsp500.l1 , the equation of dlrGDP is dlrGDP = dlrsp500.l1 + dlrGDP.l2 + const.

  1. Use the VAR model to create a multistep forecast for 2019Q1-2019Q4. Compare your forecast for real GDP growth rate in 2019Q1 with (1) the Federal Bank of New York Nowcast, (2) the GDPNow Federal Bank of Atlanta forecast, and (3) the minimum, the average, and the maximum forecasts in the Wall Street Journal Economic Forecasting Survey.
varp.f <- predict(varp, n.ahead = 8) 
plot(varp.f)

fanchart(varp.f)    

g <- autoplot(varp.f, is.date = TRUE) +
    geom_hline(yintercept = 0, linetype = "dashed") +
    labs(x = "", y = "", title = "Multistep forecast")
ggplotly(g)  
<<<<<<< HEAD

Comments: From the result, we can get our forecast for real GDP growth rate in 2019 Q1 is 2.449631, the most recent of GDP estimate of 2019Q1 in “Federal Bank of New York Nowcast” is about 1.43. the GDP estimate of 2019Q1 in “GDPNow Federal Bank of Atlanta forecast” is 2.7, and the average figure in Wall Street Journal Economic Forecasting Survey is 1.5, the minimum value is 0.5 and the maximum value is 2.94. Therefore, our estimate is in the range of the Wall Street Journal forecasting results, it’s more optimistic than New York forecast, and a little bit less optimistic than the Atlanta forecast.

=======

Comments: From the result, we can get our forecast for real GDP growth rate in 2019 Q1 is 2.449631, the average GDP estimate of 2019Q1 in “Federal Bank of New York Nowcast” is about 1.67. the average GDP estimate of 2019Q1 in “GDPNow Federal Bank of Atlanta forecast” is about 0.3-0.5, and the figure in Wall Street Journal Economic Forecasting Survey is 1.5, all of them are much lower than what we forecast here. The reason is due to the multistep ahead forecast, the error terms are highly correlated, which affect the precision.

>>>>>>> master